library(tidyr)
library(dplyr)
library(lubridate)
library(zoo)
library(ggplot2)
library(limma)
library(ggpubr)
library(grid)
library(plotly)

#Data Files and prep work
source("../../lib/DataProccess.R")
source("../../lib/NormFuncs.R")
source("../../lib/OutlierDetectionFuncs.R")
source("../../lib/DataPathName.R")
BaseDir <- params$BaseDir#get the root of the directory where the data is stored

“Files Used:”

COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv

RankingDF <- LIMSFullDF%>%
  filter(Date<mdy("10/31/2021"))%>%
  group_by(Site)%>%
  arrange(Date)%>%
  mutate(N1 = ifelse(!is.na(N1),N1,0))%>%
  mutate(N2 = ifelse(!is.na(N2),N2,0))%>%
  mutate(N1RankLeft = rank(desc(N1 - lag(N1))),
         N1RankRight = rank(desc(N1 - lead(N1))),
         N2RankLeft = rank(desc(N2 - lag(N2))),
         N2RankRight = rank(desc(N2 - lead(N2))))%>%
  select(Date,Site,N1RankLeft,N1RankRight,N2RankLeft,N2RankRight,N1,N2)%>%
  mutate(MaxN1Rank = -pmax(N1RankLeft,N1RankRight,N2RankLeft,N2RankRight))


BreakUsed <- c(.90,.95)
Vec <- PlotlyView(RankingDF, "MaxN1Rank", QuintileBound, Threshold = BreakUsed)
Vec[[1]]
Vec[[2]]
RankingDF%>%
  filter(-MaxN1Rank<=10)%>%
  arrange(Date,Site)
    #Instead Max 10, Max 20
  #Rolling SD Vs Rolling mean
  
  #1) Show everthing for clear outliers
  #2) Show soft with 25
Vec[[3]]%>%
  arrange(FlagScheme)%>%
  #select(-)%>%
  rename(ErrorLevel = FlagScheme, Rank = `MaxN1Rank`)%>%
  mutate(Rank = -Rank)%>%
  arrange(ErrorLevel, Rank)#%>%
  #write.csv("RmdOutput/PossibleOutliers.csv")


LIMSFullDF%>%
  #filter(Date<mdy("10/31/2021"))%>%
  #group_by(Site)%>%
  arrange(Date)%>%
  mutate(N1 = ifelse(!is.na(N1),N1,0))%>%
  mutate(N2 = ifelse(!is.na(N2),N2,0))%>%
  mutate(N1RankLeft = rank(desc(N1 - lag(N1))),
         N1RankRight = rank(desc(N1 - lead(N1))),
         N2RankLeft = rank(desc(N2 - lag(N2))),
         N2RankRight = rank(desc(N2 - lead(N2))))%>%
  select(Date,Site,N1RankLeft,N1RankRight,N2RankLeft,N2RankRight,N1,N2)%>%
  mutate(MaxN1Rank = -pmax(N1RankLeft,N1RankRight,N2RankLeft,N2RankRight))%>%
  write.csv("RmdOutput/PossibleOutliersFullGrouped.csv")
ThresholdFlagDF <- RankingDF%>%
  mutate(ThresholdOutlier= -MaxN1Rank<=4)%>%
  arrange(Date,Site)%>%
    ggplot(aes(x = Date))+
    theme(panel.spacing.y = unit(2, "lines"))+
    geom_point(aes(y = N1, color = Site, shape = "N1", info = -MaxN1Rank,
                   alpha = ThresholdOutlier, size = ThresholdOutlier))+
  geom_point(aes(y = N2, color = Site, shape = "N2", info = -MaxN1Rank,
                   alpha = ThresholdOutlier, size = ThresholdOutlier))
  GGPlotlyWithScaling(ThresholdFlagDF,2, Display = c("N1","N2", "Date", "info"))
ThresholdFlagDF <- RankingDF%>%
  mutate(MaxN1Rank = -MaxN1Rank, ThresholdOutlier = ifelse(MaxN1Rank<=47,"BFlagged","ANotFlagged"),
         ThresholdOutlier= ifelse(MaxN1Rank<=12, "CFlaggedRestrictive", ThresholdOutlier))%>%
  arrange(Date,Site)%>%
    ggplot(aes(x = Date))+
    theme(panel.spacing.y = unit(1, "lines"))+
    geom_point(aes(y = N1, color = ThresholdOutlier, shape = "N1",
                   alpha = ThresholdOutlier, size = ThresholdOutlier, info = MaxN1Rank))+
  geom_point(aes(y = N2, color = ThresholdOutlier, shape = "N2",
                   alpha = ThresholdOutlier, size = ThresholdOutlier, info = MaxN1Rank))+
  facet_wrap(~Site, ncol = 1, scales = "free")
#
  GGPlotlyWithScaling(ThresholdFlagDF,3, Display = c("N1","N2", "Date", "info"))
DoRanking <- function(DF,IsGrouped,isFiltered){
  UsedDF <- DF%>%
    filter(!is.na(N1),!is.na(N2))
  if(IsGrouped){
    UsedDF <- UsedDF%>%
      group_by(Site)
  }
  if(isFiltered){
    UsedDF <- UsedDF%>%
      filter(Date<mdy("10/31/2021"))
  }
  RetDF <- UsedDF%>%
    arrange(Date)%>%
    mutate(N1 = ifelse(!is.na(N1),N1,0))%>%
    mutate(N2 = ifelse(!is.na(N2),N2,0))%>%
    mutate(N1RankLeft = rank(desc(N1 - lag(N1))),
         N1RankRight = rank(desc(N1 - lead(N1))),
         N2RankLeft = rank(desc(N2 - lag(N2))),
         N2RankRight = rank(desc(N2 - lead(N2))))%>%
    select(Date,Site,N1RankLeft,N1RankRight,N2RankLeft,N2RankRight,N1,N2)%>%
    mutate(MaxN1Rank = pmax(N1RankLeft,N1RankRight,N2RankLeft,N2RankRight))%>%
    select(Date,Site,MaxN1Rank,N1,N2)
  return(RetDF)
}

RankingDFUngroupedEarlyDay <- DoRanking(LIMSFullDF,FALSE,TRUE)
RankingDFgroupedEarlyDay <- DoRanking(LIMSFullDF,TRUE,TRUE)
RankingDFUngrouped <- DoRanking(LIMSFullDF,FALSE,FALSE)
RankingDFgrouped <- DoRanking(LIMSFullDF,TRUE,FALSE)

JoinDFFromList <- function(DFList, ByOptions, Names,CombVec){
  FullDF <- full_join(DFList[[1]],DFList[[2]], by = ByOptions)
  for (i in 3:length(DFList)){
    FullDF <- full_join(FullDF,DFList[[i]], by = ByOptions)
  }
  for(i in 1:length(DFList)){
    if(i %% 2 == 1){
      Prep = paste(CombVec, paste(rep(".x",(i+1)%/%2), collapse = ""), sep = "")
    }else{
      Prep = paste(CombVec, paste(rep(".y",(i+1)%/%2), collapse = ""), sep = "")
    }
    FullDF <- FullDF%>%
      rename(!!Names[[i]] := !!sym(Prep))
  }
  return(FullDF)
}

ToMergeDFs <- list(RankingDFUngroupedEarlyDay, RankingDFgroupedEarlyDay, RankingDFUngrouped, RankingDFgrouped)
RenameLists <- list("EarlyFull", "EarlySite", "AllFull", "AllSite")

MergedDF <- JoinDFFromList(
  ToMergeDFs, ByOptions = c("Date","Site","N1","N2"), Names = RenameLists, CombVec = "MaxN1Rank")
MergedDF
GPlotly <- MergedDF%>%
  ggplot()+
    geom_point(aes(x = EarlySite, shape = Site, y = EarlyFull,color = "Filtered"))+
    geom_point(aes(x = AllSite, shape = Site, y = AllFull,color = "Full"))#+
  #geom_abline()
ggplotly(GPlotly)
A <- RankingDFUngroupedEarlyDay%>%
  filter(MaxN1Rank<=12)%>%
  arrange((MaxN1Rank))


B <- RankingDFgroupedEarlyDay%>%
  filter(MaxN1Rank<=2)%>%
  arrange((MaxN1Rank))

C <- full_join(A,B, by=c("Date","Site","N1","N2"))%>%
  #select(-N1,-N2)%>%
  arrange(Site)


G <- C%>%
  mutate(Catagory = ifelse(is.na(MaxN1Rank.x),"Grouped",ifelse(is.na(MaxN1Rank.y),"UnGrouped","Both")))%>%
  full_join(LIMSFullDF)%>%
  mutate(Catagory = ifelse(is.na(Catagory),"NotFlagged",Catagory))%>%
  ggplot()+
  geom_point(aes(x=Date,y=N1,color = Catagory), size = .75)+
  facet_wrap(~Site)
ggplotly(G)
GGPlotlyWithScaling <- function(GGPlotObject, FactorSize, Display = c("Date","N1","N2", "info")){
  AlphaScalling <- scale_alpha_manual(values = seq(.3,1, length.out = FactorSize))
  SizeScalling <- scale_size_manual(values = seq(.7,2, length.out = FactorSize))
  #ColorScalling <- scale_color_manual(values = 
  #                                c("#fee391", "#fec44f", "#fe9929",
  #                                "#d95f0e", "#993404", "#6f2f04")[(7-FactorSize):6])
  PlotlyObject <- GGPlotObject+
    AlphaScalling+
    SizeScalling#+
    #ColorScalling
  return(ggplotly(PlotlyObject, tooltip = Display))
}


ThresholdingLabel <- function(X,ThresholdC){
  A = 0
  for (i in 1:length(ThresholdC)){
    A = A + (X<=ThresholdC[i])

  }
  return(factor(A))
}

ThresholdGenDF <- function(DF, SysUsed ,Thresholds){
  ThresholdFlagDF <- DF%>%
    filter(!is.na(!!sym(SysUsed)))%>%
    mutate(ThresholdOutlier= ThresholdingLabel(!!sym(SysUsed),Thresholds))%>%
    arrange(Date,Site)
  
  return(ThresholdFlagDF)
}

ThresholdingPloting <- function(DF, SysUsed=""){
  DF%>%
    filter(!is.na(ThresholdOutlier))%>%
    ggplot(aes(x = Date))+
    theme(panel.spacing.y = unit(2, "lines"))+
    geom_point(aes(y = N1, color = Site, shape = "N1", info = !!sym(SysUsed),
                   alpha = ThresholdOutlier, size = ThresholdOutlier))+
    geom_point(aes(y = N2, color = Site, shape = "N2", info = !!sym(SysUsed),
                   alpha = ThresholdOutlier, size = ThresholdOutlier))
}


#MergedDF

#"EarlyFull", "EarlySite", "AllFull", "AllSite"
Thresh <- c(4, 11, 15)
UsedDF <- ThresholdGenDF(MergedDF, "EarlySite", Thresh)
UsedDF%>%
  dplyr::filter(as.integer(ThresholdOutlier)>0)
PlotedGraphic <- ThresholdingPloting(UsedDF, SysUsed = "EarlySite")+
  facet_wrap(~Site, ncol = 1, scales = "free")

GGPlotlyWithScaling(PlotedGraphic, length(Thresh)+1)